/***************************************************************************** * --------------------------------------------------------------------------- * File: * @(#) IndexParser.java * Author: * Ning Zhang * nzhang@systemsbiology.org * ****************************************************************************** * * * * This software is provided ``AS IS'' and any express or implied * warranties, including, but not limited to, the implied warranties of * merchantability and fitness for a particular purpose, are disclaimed. In * no event shall the authors or the Institute for Systems Biology liable * for any direct, indirect, incidental, special, exemplary, or * consequential damages (including, but not limited to, procurement of * substitute goods or services; loss of use, data, or profits; or business * interruption) however caused and on any theory of liability, whether in * contract, strict liability, or tort (including negligence or otherwise) * arising in any way out of the use of this software, even if advised of * the possibility of such damage. * ******************************************************************************/ package org.systemsbiology.jrap.stax; import javax.xml.stream.*; import java.io.*; import java.util.*; /** * dhmay 20091021: * -Incorporating Vagisha's changes to close files, merging in my changes for mzML 1.1 support * -adding a debug flag for messages that probably don't need to be displayed every time */ public class IndexParser { String inputMZXMLfilename; Map<Integer, Long> offsetMap = new HashMap<Integer, Long>(10000); //for mzML long chrogramIndex=-1; int maxScan = -1; int currentScan = -1; //Is the file mzXML or mzML? boolean isXML = false; boolean isML = false; //a flag for verbose logging protected boolean debug = false; public IndexParser(String inputMZXMLfilename) { this.inputMZXMLfilename = inputMZXMLfilename; //determine whether the file is mzXML or mzML? This is a bit hokey if(inputMZXMLfilename.contains("mzXML")) isXML = true; else isML = true; } public Map<Integer, Long> getOffsetMap() { if (debug) System.out.println("offset size "+offsetMap.size()); return offsetMap; } public long getChrogramIndex() { if (debug) System.out.println("chrogramIndex "+chrogramIndex); return chrogramIndex; } public int getMaxScan() { if (debug) System.out.println("maxScan "+maxScan); return maxScan; } private long getIndexPosition() { FileInputStream fileIN = null; long indexPosition = -1; File tmpXML = null; //mzXML and mzML have different element name String indexName = null; if(isXML) indexName = "indexOffset"; else indexName = "indexListOffset"; try { tmpXML = new File(inputMZXMLfilename); //System.out.println(inputMZXMLfilename +" length is "+ tmpXML.length()); fileIN = new FileInputStream(inputMZXMLfilename); fileIN.skip(tmpXML.length() - 500); byte[] bytes = new byte[500]; int bytesRead = fileIN.read(bytes); String footer = new String(bytes, 0, bytesRead); int offset; if ((offset = footer.indexOf("<"+indexName+">")) == -1) { System.err.println("<"+indexName+">"+" not found!!!"); } footer = footer.substring(offset + indexName.length()+2); int endIndex = footer.indexOf("</"+indexName+">"); if (endIndex == -1) { System.err.println("</"+indexName+"> not found!!!"); } footer = footer.substring(0, endIndex); indexPosition = Long.parseLong(footer); fileIN.close(); if (debug) System.out.println("indexPosition is "+indexPosition); } catch (Exception e) { System.out.println("exception:" + e); e.printStackTrace(); } finally { if(fileIN != null) { try { fileIN.close(); } catch (IOException e) { e.printStackTrace(); } } } return indexPosition; } public void parseIndexes() { FileInputStream fileIN = null; XMLStreamReader xmlSR = null; try{ long indexPos = getIndexPosition(); // System.out.println("indexPos "+indexPos); fileIN = new FileInputStream(inputMZXMLfilename); fileIN.skip(indexPos); XMLInputFactory inputFactory = XMLInputFactory.newInstance(); xmlSR = inputFactory.createXMLStreamReader(fileIN); String elementName = null; StringBuffer indexBuffer = new StringBuffer(); boolean inOffSet = false; //for mzML boolean inSpec = false; boolean inChrogram = false; while(xmlSR.hasNext()) { int event = xmlSR.next(); if(event == xmlSR.START_ELEMENT) { elementName = xmlSR.getLocalName(); // System.out.println("elementName "+elementName); if(elementName.equals("index")) { if(isML) { if((xmlSR.getAttributeValue(null,"name")).equals("spectrum")) inSpec = true; if((xmlSR.getAttributeValue(null,"name")).equals("chromatogram")) { inSpec = false; inChrogram = true; } } } if(elementName.equals("offset")) { if(indexBuffer.length()>0) indexBuffer.delete(0,indexBuffer.capacity()); inOffSet = true; if(isXML) currentScan = Integer.parseInt(xmlSR.getAttributeValue(null, "id")); else if(inSpec) //dhmay changing from the "nativeID" attribute due to mzML 1.1 change. 1.1 format seems to be //"scan=<scannum>" as the "idRef" attribute value, but there may be extra name-value pairs in there { currentScan = parseScanNumberFromOffsetIdrefField(xmlSR.getAttributeValue(null, "idRef")); } } } if(event == xmlSR.CHARACTERS) { if(inOffSet) indexBuffer.append(xmlSR.getText()); } if(event == xmlSR.END_ELEMENT) { elementName = xmlSR.getLocalName(); if(elementName.equals("offset")) { long offset = Long.valueOf(indexBuffer.toString()); if(inChrogram) chrogramIndex = offset; else { assert currentScan != -1 : "Did not find the scan number associated with offset " + indexBuffer.toString(); //System.out.println("index "+indexBuffer.toString()); offsetMap.put(currentScan, offset); maxScan = currentScan; currentScan = -1; indexBuffer.delete(0,indexBuffer.capacity()); inOffSet = false; } } if(elementName.equals("index")) { if(isXML) throw new XMLStreamException("IndexEndFoundException"); else if(inChrogram) throw new XMLStreamException("IndexEndFoundException"); } } } } catch(Exception e) { if(!(e.getMessage()).equals("IndexEndFoundException")) e.printStackTrace(System.err); } finally { if(xmlSR != null) { try { xmlSR.close(); } catch (XMLStreamException e) { e.printStackTrace(); } } if(fileIN != null) { try { fileIN.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * dhmay adding. mzML 1.1 changes the way scan IDs are stored in the index. They are now stored in * the "idRef" attribute of "offset", which is being used to contain multiple name-value pairs; the * name of the name-value pair containing the scan number is "scan", so I'm knocking off everything but that pair. * @param idString * @return */ protected int parseScanNumberFromOffsetIdrefField(String idString) { if (idString.contains("scan=")) idString = idString.substring(idString.indexOf("scan=") + "scan=".length()); if (idString.contains(" ")) idString = idString.substring(0, idString.indexOf(" ")); return Integer.parseInt(idString); } public boolean isDebug() { return debug; } public void setDebug(boolean debug) { this.debug = debug; } }